{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy import stats\n",
    "pd.set_option('display.max_columns',None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=pd.read_csv(r'D:\\360MoveData\\Users\\ASUS\\Desktop\\拉钩数据分析\\模块九\\作业资料下载\\车贷违约预测.csv',encoding='gb2312')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 199717 entries, 0 to 199716\n",
      "Data columns (total 49 columns):\n",
      "客户编号              199717 non-null int64\n",
      "已发货款              199717 non-null int64\n",
      "资产成本              199717 non-null int64\n",
      "贷款与资产比列           199717 non-null float64\n",
      "品牌                199717 non-null int64\n",
      "骑车销售商             199717 non-null int64\n",
      "车厂                199717 non-null int64\n",
      "出生日期              199717 non-null int64\n",
      "货款日期              199717 non-null int64\n",
      "地区                199717 non-null int64\n",
      "对接员工编号            199717 non-null int64\n",
      "是否填写手机号           199717 non-null int64\n",
      "受否填写身份证           199717 non-null int64\n",
      "是否出具驾驶证           199717 non-null int64\n",
      "是否填写护照            199717 non-null int64\n",
      "信用评分              199717 non-null int64\n",
      "主账户贷款次数           199717 non-null int64\n",
      "主账户有效贷款次数         199717 non-null int64\n",
      "主账户中尚未还清有效贷款      199717 non-null int64\n",
      "主账户中已批准的贷款        199717 non-null int64\n",
      "主账户中已发放贷款         199717 non-null int64\n",
      "次账户贷款次数           199717 non-null int64\n",
      "次账户有效贷款次数         199717 non-null int64\n",
      "次账户中尚未还清有效贷款      199717 non-null int64\n",
      "次账户中已批准贷款         199717 non-null int64\n",
      "次账户中已发放贷款         199717 non-null int64\n",
      "主账户每月还款           199717 non-null int64\n",
      "次账户没用还款           199717 non-null int64\n",
      "近六个月新贷款次数         199717 non-null int64\n",
      "近六个月违约次数          199717 non-null int64\n",
      "平均贷款期限            199717 non-null int64\n",
      "第一次贷款距今时间         199717 non-null int64\n",
      "贷款查询次数            199717 non-null int64\n",
      "是否违约              199717 non-null int64\n",
      "贷款与资产比            199717 non-null float64\n",
      "贷款总次数             199717 non-null int64\n",
      "主账户无效贷款次数         199717 non-null int64\n",
      "次账户无效贷款次数         199717 non-null int64\n",
      "无效贷款总次数           199717 non-null int64\n",
      "尚未还清有效贷款总额        199717 non-null int64\n",
      "已批准贷款总额           199717 non-null int64\n",
      "已发放贷款总额           199717 non-null int64\n",
      "每月还款总额            199717 non-null int64\n",
      "贷款与已还贷款比列         199717 non-null float64\n",
      "主账户还款期数           199717 non-null int64\n",
      "次账户还款期数           199717 non-null int64\n",
      "贷款与已批准贷款比列        199717 non-null float64\n",
      "总贷款次数与总有效贷款次数比    199717 non-null float64\n",
      "工作类型              199717 non-null int64\n",
      "dtypes: float64(5), int64(44)\n",
      "memory usage: 74.7 MB\n"
     ]
    }
   ],
   "source": [
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>客户编号</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>5.356909e+05</td>\n",
       "      <td>6.819341e+04</td>\n",
       "      <td>4.174280e+05</td>\n",
       "      <td>476762.000000</td>\n",
       "      <td>535571.000000</td>\n",
       "      <td>594571.000000</td>\n",
       "      <td>6.710840e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已发货款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>5.425627e+04</td>\n",
       "      <td>1.297766e+04</td>\n",
       "      <td>1.332000e+04</td>\n",
       "      <td>46977.000000</td>\n",
       "      <td>53703.000000</td>\n",
       "      <td>60247.000000</td>\n",
       "      <td>9.905720e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>资产成本</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.582391e+04</td>\n",
       "      <td>1.892894e+04</td>\n",
       "      <td>3.700000e+04</td>\n",
       "      <td>65714.000000</td>\n",
       "      <td>70922.000000</td>\n",
       "      <td>79159.000000</td>\n",
       "      <td>1.628992e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.464396e+01</td>\n",
       "      <td>1.149048e+01</td>\n",
       "      <td>1.003000e+01</td>\n",
       "      <td>68.730000</td>\n",
       "      <td>76.670000</td>\n",
       "      <td>83.590000</td>\n",
       "      <td>9.500000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>品牌</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.269851e+01</td>\n",
       "      <td>6.970618e+01</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>61.000000</td>\n",
       "      <td>130.000000</td>\n",
       "      <td>2.610000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>骑车销售商</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.963405e+04</td>\n",
       "      <td>3.493655e+03</td>\n",
       "      <td>1.052400e+04</td>\n",
       "      <td>16505.000000</td>\n",
       "      <td>20333.000000</td>\n",
       "      <td>23000.000000</td>\n",
       "      <td>2.480300e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>车厂</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>6.908577e+01</td>\n",
       "      <td>2.212829e+01</td>\n",
       "      <td>4.500000e+01</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>1.560000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>出生日期</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.983877e+03</td>\n",
       "      <td>9.805565e+00</td>\n",
       "      <td>1.949000e+03</td>\n",
       "      <td>1977.000000</td>\n",
       "      <td>1986.000000</td>\n",
       "      <td>1992.000000</td>\n",
       "      <td>2.000000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>货款日期</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.018000e+03</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.018000e+03</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2018.000000</td>\n",
       "      <td>2.018000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>地区</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.245222e+00</td>\n",
       "      <td>4.481338e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>2.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>对接员工编号</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.547858e+03</td>\n",
       "      <td>9.749015e+02</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>712.000000</td>\n",
       "      <td>1449.000000</td>\n",
       "      <td>2357.000000</td>\n",
       "      <td>3.795000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写手机号</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>受否填写身份证</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.334804e-02</td>\n",
       "      <td>1.510067e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否填写护照</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.143032e-03</td>\n",
       "      <td>4.624338e-02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>信用评分</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.917625e+02</td>\n",
       "      <td>3.393176e+02</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>680.000000</td>\n",
       "      <td>8.900000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.464037e+00</td>\n",
       "      <td>5.283968e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.530000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.048414e+00</td>\n",
       "      <td>1.951018e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.440000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.687286e+05</td>\n",
       "      <td>9.638043e+05</td>\n",
       "      <td>-6.678296e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>35899.000000</td>\n",
       "      <td>9.652492e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.224323e+05</td>\n",
       "      <td>2.522528e+06</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>64000.000000</td>\n",
       "      <td>1.000000e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.220420e+05</td>\n",
       "      <td>2.525814e+06</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>62000.000000</td>\n",
       "      <td>1.000000e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>5.952423e-02</td>\n",
       "      <td>6.306478e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.768918e-02</td>\n",
       "      <td>3.144277e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.600000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>5.583871e+03</td>\n",
       "      <td>1.686728e+05</td>\n",
       "      <td>-5.746470e+05</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.603285e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.490970e+03</td>\n",
       "      <td>1.818362e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.688820e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.374478e+03</td>\n",
       "      <td>1.812332e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.688820e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户每月还款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.314415e+04</td>\n",
       "      <td>1.524289e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2000.000000</td>\n",
       "      <td>2.564281e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户没用还款</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>3.013734e+02</td>\n",
       "      <td>1.304531e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.246710e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>3.850699e-01</td>\n",
       "      <td>9.573387e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>3.500000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>近六个月违约次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>9.595578e-02</td>\n",
       "      <td>3.809351e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>平均贷款期限</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>8.058107e+00</td>\n",
       "      <td>1.386076e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>13.000000</td>\n",
       "      <td>1.170000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.319088e+01</td>\n",
       "      <td>2.115686e+01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>1.170000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款查询次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.033377e-01</td>\n",
       "      <td>6.940867e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>是否违约</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.773910e-01</td>\n",
       "      <td>3.820002e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与资产比</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>7.235745e-01</td>\n",
       "      <td>1.136129e-01</td>\n",
       "      <td>9.463821e-02</td>\n",
       "      <td>0.664431</td>\n",
       "      <td>0.741715</td>\n",
       "      <td>0.809512</td>\n",
       "      <td>9.379874e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款总次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.523561e+00</td>\n",
       "      <td>5.356066e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.530000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.415623e+00</td>\n",
       "      <td>4.038380e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.510000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>3.183505e-02</td>\n",
       "      <td>4.127953e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>4.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.447458e+00</td>\n",
       "      <td>4.075544e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>4.510000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.743125e+05</td>\n",
       "      <td>9.813640e+05</td>\n",
       "      <td>-6.678296e+06</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>38189.000000</td>\n",
       "      <td>9.652492e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.299233e+05</td>\n",
       "      <td>2.530977e+06</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>67206.000000</td>\n",
       "      <td>1.000000e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.294165e+05</td>\n",
       "      <td>2.534185e+06</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>65085.000000</td>\n",
       "      <td>1.000000e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>每月还款总额</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.344553e+04</td>\n",
       "      <td>1.531618e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2094.000000</td>\n",
       "      <td>2.564281e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>inf</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.100003e+05</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.260000</td>\n",
       "      <td>inf</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>主账户还款期数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>5.059582e+04</td>\n",
       "      <td>2.275670e+06</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>1.000000e+09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>次账户还款期数</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>2.928000e+03</td>\n",
       "      <td>1.065410e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.980000e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>5.535709e+02</td>\n",
       "      <td>1.141343e+05</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>5.000000e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>1.438913e+00</td>\n",
       "      <td>7.922133e-01</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.670000</td>\n",
       "      <td>1.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>工作类型</th>\n",
       "      <td>199717.0</td>\n",
       "      <td>4.874748e-01</td>\n",
       "      <td>5.619145e-01</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   count          mean           std           min  \\\n",
       "客户编号            199717.0  5.356909e+05  6.819341e+04  4.174280e+05   \n",
       "已发货款            199717.0  5.425627e+04  1.297766e+04  1.332000e+04   \n",
       "资产成本            199717.0  7.582391e+04  1.892894e+04  3.700000e+04   \n",
       "贷款与资产比列         199717.0  7.464396e+01  1.149048e+01  1.003000e+01   \n",
       "品牌              199717.0  7.269851e+01  6.970618e+01  1.000000e+00   \n",
       "骑车销售商           199717.0  1.963405e+04  3.493655e+03  1.052400e+04   \n",
       "车厂              199717.0  6.908577e+01  2.212829e+01  4.500000e+01   \n",
       "出生日期            199717.0  1.983877e+03  9.805565e+00  1.949000e+03   \n",
       "货款日期            199717.0  2.018000e+03  0.000000e+00  2.018000e+03   \n",
       "地区              199717.0  7.245222e+00  4.481338e+00  1.000000e+00   \n",
       "对接员工编号          199717.0  1.547858e+03  9.749015e+02  1.000000e+00   \n",
       "是否填写手机号         199717.0  1.000000e+00  0.000000e+00  1.000000e+00   \n",
       "受否填写身份证         199717.0  1.000000e+00  0.000000e+00  1.000000e+00   \n",
       "是否出具驾驶证         199717.0  2.334804e-02  1.510067e-01  0.000000e+00   \n",
       "是否填写护照          199717.0  2.143032e-03  4.624338e-02  0.000000e+00   \n",
       "信用评分            199717.0  2.917625e+02  3.393176e+02  0.000000e+00   \n",
       "主账户贷款次数         199717.0  2.464037e+00  5.283968e+00  0.000000e+00   \n",
       "主账户有效贷款次数       199717.0  1.048414e+00  1.951018e+00  0.000000e+00   \n",
       "主账户中尚未还清有效贷款    199717.0  1.687286e+05  9.638043e+05 -6.678296e+06   \n",
       "主账户中已批准的贷款      199717.0  2.224323e+05  2.522528e+06  0.000000e+00   \n",
       "主账户中已发放贷款       199717.0  2.220420e+05  2.525814e+06  0.000000e+00   \n",
       "次账户贷款次数         199717.0  5.952423e-02  6.306478e-01  0.000000e+00   \n",
       "次账户有效贷款次数       199717.0  2.768918e-02  3.144277e-01  0.000000e+00   \n",
       "次账户中尚未还清有效贷款    199717.0  5.583871e+03  1.686728e+05 -5.746470e+05   \n",
       "次账户中已批准贷款       199717.0  7.490970e+03  1.818362e+05  0.000000e+00   \n",
       "次账户中已发放贷款       199717.0  7.374478e+03  1.812332e+05  0.000000e+00   \n",
       "主账户每月还款         199717.0  1.314415e+04  1.524289e+05  0.000000e+00   \n",
       "次账户没用还款         199717.0  3.013734e+02  1.304531e+04  0.000000e+00   \n",
       "近六个月新贷款次数       199717.0  3.850699e-01  9.573387e-01  0.000000e+00   \n",
       "近六个月违约次数        199717.0  9.595578e-02  3.809351e-01  0.000000e+00   \n",
       "平均贷款期限          199717.0  8.058107e+00  1.386076e+01  0.000000e+00   \n",
       "第一次贷款距今时间       199717.0  1.319088e+01  2.115686e+01  0.000000e+00   \n",
       "贷款查询次数          199717.0  2.033377e-01  6.940867e-01  0.000000e+00   \n",
       "是否违约            199717.0  1.773910e-01  3.820002e-01  0.000000e+00   \n",
       "贷款与资产比          199717.0  7.235745e-01  1.136129e-01  9.463821e-02   \n",
       "贷款总次数           199717.0  2.523561e+00  5.356066e+00  0.000000e+00   \n",
       "主账户无效贷款次数       199717.0  1.415623e+00  4.038380e+00  0.000000e+00   \n",
       "次账户无效贷款次数       199717.0  3.183505e-02  4.127953e-01  0.000000e+00   \n",
       "无效贷款总次数         199717.0  1.447458e+00  4.075544e+00  0.000000e+00   \n",
       "尚未还清有效贷款总额      199717.0  1.743125e+05  9.813640e+05 -6.678296e+06   \n",
       "已批准贷款总额         199717.0  2.299233e+05  2.530977e+06  0.000000e+00   \n",
       "已发放贷款总额         199717.0  2.294165e+05  2.534185e+06  0.000000e+00   \n",
       "每月还款总额          199717.0  1.344553e+04  1.531618e+05  0.000000e+00   \n",
       "贷款与已还贷款比列       199717.0           inf           NaN -1.100003e+05   \n",
       "主账户还款期数         199717.0  5.059582e+04  2.275670e+06  0.000000e+00   \n",
       "次账户还款期数         199717.0  2.928000e+03  1.065410e+05  0.000000e+00   \n",
       "贷款与已批准贷款比列      199717.0  5.535709e+02  1.141343e+05  0.000000e+00   \n",
       "总贷款次数与总有效贷款次数比  199717.0  1.438913e+00  7.922133e-01  1.000000e+00   \n",
       "工作类型            199717.0  4.874748e-01  5.619145e-01  0.000000e+00   \n",
       "\n",
       "                          25%            50%            75%           max  \n",
       "客户编号            476762.000000  535571.000000  594571.000000  6.710840e+05  \n",
       "已发货款             46977.000000   53703.000000   60247.000000  9.905720e+05  \n",
       "资产成本             65714.000000   70922.000000   79159.000000  1.628992e+06  \n",
       "贷款与资产比列             68.730000      76.670000      83.590000  9.500000e+01  \n",
       "品牌                  14.000000      61.000000     130.000000  2.610000e+02  \n",
       "骑车销售商            16505.000000   20333.000000   23000.000000  2.480300e+04  \n",
       "车厂                  48.000000      86.000000      86.000000  1.560000e+02  \n",
       "出生日期              1977.000000    1986.000000    1992.000000  2.000000e+03  \n",
       "货款日期              2018.000000    2018.000000    2018.000000  2.018000e+03  \n",
       "地区                   4.000000       6.000000      10.000000  2.200000e+01  \n",
       "对接员工编号             712.000000    1449.000000    2357.000000  3.795000e+03  \n",
       "是否填写手机号              1.000000       1.000000       1.000000  1.000000e+00  \n",
       "受否填写身份证              1.000000       1.000000       1.000000  1.000000e+00  \n",
       "是否出具驾驶证              0.000000       0.000000       0.000000  1.000000e+00  \n",
       "是否填写护照               0.000000       0.000000       0.000000  1.000000e+00  \n",
       "信用评分                 0.000000      14.000000     680.000000  8.900000e+02  \n",
       "主账户贷款次数              0.000000       1.000000       3.000000  4.530000e+02  \n",
       "主账户有效贷款次数            0.000000       0.000000       1.000000  1.440000e+02  \n",
       "主账户中尚未还清有效贷款         0.000000       0.000000   35899.000000  9.652492e+07  \n",
       "主账户中已批准的贷款           0.000000       0.000000   64000.000000  1.000000e+09  \n",
       "主账户中已发放贷款            0.000000       0.000000   62000.000000  1.000000e+09  \n",
       "次账户贷款次数              0.000000       0.000000       0.000000  5.200000e+01  \n",
       "次账户有效贷款次数            0.000000       0.000000       0.000000  3.600000e+01  \n",
       "次账户中尚未还清有效贷款         0.000000       0.000000       0.000000  3.603285e+07  \n",
       "次账户中已批准贷款            0.000000       0.000000       0.000000  2.688820e+07  \n",
       "次账户中已发放贷款            0.000000       0.000000       0.000000  2.688820e+07  \n",
       "主账户每月还款              0.000000       0.000000    2000.000000  2.564281e+07  \n",
       "次账户没用还款              0.000000       0.000000       0.000000  3.246710e+06  \n",
       "近六个月新贷款次数            0.000000       0.000000       0.000000  3.500000e+01  \n",
       "近六个月违约次数             0.000000       0.000000       0.000000  2.000000e+01  \n",
       "平均贷款期限               0.000000       0.000000      13.000000  1.170000e+02  \n",
       "第一次贷款距今时间            0.000000       0.000000      20.000000  1.170000e+02  \n",
       "贷款查询次数               0.000000       0.000000       0.000000  2.800000e+01  \n",
       "是否违约                 0.000000       0.000000       0.000000  1.000000e+00  \n",
       "贷款与资产比               0.664431       0.741715       0.809512  9.379874e-01  \n",
       "贷款总次数                0.000000       1.000000       3.000000  4.530000e+02  \n",
       "主账户无效贷款次数            0.000000       0.000000       1.000000  4.510000e+02  \n",
       "次账户无效贷款次数            0.000000       0.000000       0.000000  4.200000e+01  \n",
       "无效贷款总次数              0.000000       0.000000       1.000000  4.510000e+02  \n",
       "尚未还清有效贷款总额           0.000000       0.000000   38189.000000  9.652492e+07  \n",
       "已批准贷款总额              0.000000       0.000000   67206.000000  1.000000e+09  \n",
       "已发放贷款总额              0.000000       0.000000   65085.000000  1.000000e+09  \n",
       "每月还款总额               0.000000       0.000000    2094.000000  2.564281e+07  \n",
       "贷款与已还贷款比列            1.000000       1.000000       1.260000           inf  \n",
       "主账户还款期数              0.000000       0.000000      25.000000  1.000000e+09  \n",
       "次账户还款期数              0.000000       0.000000       0.000000  1.980000e+07  \n",
       "贷款与已批准贷款比列           1.000000       1.000000       1.000000  5.000000e+07  \n",
       "总贷款次数与总有效贷款次数比       1.000000       1.000000       1.670000  1.800000e+01  \n",
       "工作类型                 0.000000       0.000000       1.000000  2.000000e+00  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.describe().T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## 异常值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'whiskers': [<matplotlib.lines.Line2D at 0x19b62ef0>,\n",
       "  <matplotlib.lines.Line2D at 0x19b76278>],\n",
       " 'caps': [<matplotlib.lines.Line2D at 0x19b765c0>,\n",
       "  <matplotlib.lines.Line2D at 0x19b76908>],\n",
       " 'boxes': [<matplotlib.lines.Line2D at 0x19b62ba8>],\n",
       " 'medians': [<matplotlib.lines.Line2D at 0x19b76c50>],\n",
       " 'fliers': [<matplotlib.lines.Line2D at 0x19b76f98>],\n",
       " 'means': []}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEDCAYAAAAlRP8qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAN1ElEQVR4nO3dX4xc91mH8eebtVMu+k+qF0Rip47ALbZWhcIoRdQSsVokpxexoC2KRSWKVvUNWS5okVItSqsgC9EiVcJNAQtHgUpsSHsBVmXIRXFVjJoqa5VWcVZGVkrxYkS2TUnURCGO8nKxk3SyHu+edWd3vL8+HynynHN+mXkvnCfHZ33OpKqQJG19N4x7AEnSaBh0SWqEQZekRhh0SWqEQZekRhh0SWrEWIOe5IEkTyV5vMPatyb5cpJvJflKkp2bMaMkbRXjPkN/EDjYce2fAn9TVe8A7gP+eKOGkqStaKxBr6qvAk8P7kvyM0n+KcnZJP+S5Of6h/YBX+6/Pg0c2sRRJem6N+4z9GGOAzNV9UvAx4DP9fd/E3h///WvA29I8pYxzCdJ16Vt4x5gUJLXA78CfCHJK7tf1//1Y8Bnk3wY+CrwX8BLmz2jJF2vrqugs/wnhv+tql9YeaCqLgG/Aa+G//1V9cwmzydJ163r6pJLVT0LfDvJBwGy7Of7r3ckeWXejwMPjGlMSboujfuvLc4BXwPenmQxyTTwW8B0km8C5/jhDz9vB84n+Xfgp4CjYxhZkq5b8fG5ktSG6+qSiyTp2o3th6I7duyo3bt3j+vjJWlLOnv27HeranLYsbEFfffu3czPz4/r4yVpS0rynasd85KLJDXCoEtSIwy6JDXCoEtSIwy6JDVizaCv9SUU/dvz/yzJhf6XT/zi6MeUNt7c3BxTU1NMTEwwNTXF3NzcuEeS1qXLGfqDrP4lFHcAe/r/HAH+/EcfS9pcc3NzzM7OcuzYMV544QWOHTvG7OysUdeWsmbQh30JxQqHWP4moaqqR4E3J/npUQ0obYajR49y4sQJDhw4wPbt2zlw4AAnTpzg6FEfGaStYxTX0G8GLg5sL/b3XSHJkSTzSeaXlpZG8NHSaCwsLLB///7X7Nu/fz8LCwtjmkhav1EEPUP2DX3iV1Udr6peVfUmJ4feuSqNxd69ezlz5sxr9p05c4a9e/eOaSJp/UYR9EVg18D2TuDSCN5X2jSzs7NMT09z+vRpLl++zOnTp5menmZ2dnbco0mdjeJZLieBu5M8BLwLeKaq/nsE7yttmsOHDwMwMzPDwsICe/fu5ejRo6/ul7aCNZ+H3v8SituBHcD/AJ8AtgNU1V9k+cs/P8vy34R5HvidqlrzqVu9Xq98OJckrU+Ss1XVG3ZszTP0qlr1FKWW/4/wu9c4myRpRLxTVJIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqREGXZIa0SnoSQ4mOZ/kQpJ7hhy/JcnpJN9I8q0k7xv9qJKk1awZ9CQTwP3AHcA+4HCSfSuW/SHwcFW9E7gL+NyoB5Ukra7LGfptwIWqerKqXgQeAg6tWFPAG/uv3wRcGt2IkqQuugT9ZuDiwPZif9+gTwIfSrIInAJmhr1RkiNJ5pPMLy0tXcO4kqSr6RL0DNlXK7YPAw9W1U7gfcDnk1zx3lV1vKp6VdWbnJxc/7SSpKvqEvRFYNfA9k6uvKQyDTwMUFVfA34C2DGKASVJ3XQJ+mPAniS3JrmR5R96nlyx5j+B9wAk2cty0L2mIkmbaM2gV9VLwN3AI8ACy3+b5VyS+5Lc2V/2UeAjSb4JzAEfrqqVl2UkSRtoW5dFVXWK5R92Du67d+D1E8C7RzuaJGk9vFNUkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEZ2CnuRgkvNJLiS55yprfjPJE0nOJfnb0Y4pSVrLtrUWJJkA7gd+DVgEHktysqqeGFizB/g48O6q+n6Sn9yogSVJw3U5Q78NuFBVT1bVi8BDwKEVaz4C3F9V3weoqqdGO6YkaS1dgn4zcHFge7G/b9DbgLcl+dckjyY5OOyNkhxJMp9kfmlp6domliQN1SXoGbKvVmxvA/YAtwOHgb9K8uYr/qWq41XVq6re5OTkemeVJK2iS9AXgV0D2zuBS0PW/ENVXa6qbwPnWQ68JGmTdAn6Y8CeJLcmuRG4Czi5Ys3fAwcAkuxg+RLMk6McVJK0ujWDXlUvAXcDjwALwMNVdS7JfUnu7C97BPhekieA08AfVNX3NmpoSdKVUrXycvjm6PV6NT8/P5bPlqStKsnZquoNO+adopLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUCIMuSY0w6JLUiE5BT3IwyfkkF5Lcs8q6DySpJL3RjShJ6mLNoCeZAO4H7gD2AYeT7Buy7g3A7wFfH/WQkqS1dTlDvw24UFVPVtWLwEPAoSHr/gj4FPDCCOeTJHXUJeg3AxcHthf7+16V5J3Arqr60mpvlORIkvkk80tLS+seVpJ0dV2CniH76tWDyQ3AZ4CPrvVGVXW8qnpV1ZucnOw+pSRpTV2CvgjsGtjeCVwa2H4DMAV8Jcl/AL8MnPQHo5K0uboE/TFgT5Jbk9wI3AWcfOVgVT1TVTuqandV7QYeBe6sqvkNmViSNNSaQa+ql4C7gUeABeDhqjqX5L4kd270gJKkbrZ1WVRVp4BTK/bde5W1t//oY0mS1ss7RSWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhrRKehJDiY5n+RCknuGHP/9JE8k+VaSLyd56+hHlSStZs2gJ5kA7gfuAPYBh5PsW7HsG0Cvqt4BfBH41KgHlSStrssZ+m3Ahap6sqpeBB4CDg0uqKrTVfV8f/NRYOdox5QkraVL0G8GLg5sL/b3Xc008I/DDiQ5kmQ+yfzS0lL3KSVJa+oS9AzZV0MXJh8CesCnhx2vquNV1auq3uTkZPcpJUlr2tZhzSKwa2B7J3Bp5aIk7wVmgV+tqv8bzXiSpK66nKE/BuxJcmuSG4G7gJODC5K8E/hL4M6qemr0Y0qS1rJm0KvqJeBu4BFgAXi4qs4luS/Jnf1lnwZeD3whyb8lOXmVt5MkbZAul1yoqlPAqRX77h14/d4RzyVJWifvFJWkRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZekRhh0SWqEQZf65ubmmJqaYmJigqmpKebm5sY9krQunZ7lIrVubm6O2dlZTpw4wf79+zlz5gzT09MAHD58eMzTSd2kauh3VWy4Xq9X8/PzY/lsaaWpqSmOHTvGgQMHXt13+vRpZmZmePzxx8c4mfRaSc5WVW/YMS+5SMDCwgKLi4uvueSyuLjIwsLCuEeTOjPoEnDTTTcxMzPDc889B8Bzzz3HzMwMN91005gnk7rzGroEPP/88zz77LP84Ac/4OWXX+bixYu8/PLLTExMjHs0qTODLgFPP/30Ffuqauh+6XrlJRepb/v27dxww/J/EjfccAPbt28f80TS+niGLvVdvnx56Gtpq/AMXRqQ5DW/SluJQZcGvHJfxrjuz5B+FAZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEQZdkhph0CWpEZ2CnuRgkvNJLiS5Z8jx1yX5u/7xryfZPepBJUmrWzPoSSaA+4E7gH3A4ST7ViybBr5fVT8LfAb4k1EPKklaXZcz9NuAC1X1ZFW9CDwEHFqx5hDw1/3XXwTeEx9XJ0mbqsvz0G8GLg5sLwLvutqaqnopyTPAW4DvDi5KcgQ4AnDLLbdc48j6sfbJN23I29Yn3rjpn7n83s9s3Hvrx06XoA870175bNEua6iq48BxgF6v5/NJtX4bFMDV/kDpo3S1VXS55LII7BrY3glcutqaJNuANwF+GaMkbaIuQX8M2JPk1iQ3AncBJ1esOQn8dv/1B4B/Lk9rtIVc7berv421lax5yaV/Tfxu4BFgAnigqs4luQ+Yr6qTwAng80kusHxmftdGDi1tBOOtra7Tl0RX1Sng1Ip99w68fgH44GhHkySth3eKSlIjDLokNcKgS1IjDLokNSLj+sl+kiXgO2P5cGl1O1hxl7N0HXlrVU0OOzC2oEvXqyTzVdUb9xzSennJRZIaYdAlqREGXbrS8XEPIF0Lr6FLUiM8Q5ekRhh0SWqEQZf6kjyQ5Kkkj497FulaGHTphx4EDo57COlaGXSpr6q+it+0pS3MoEtSIwy6JDXCoEtSIwy6JDXCoEt9SeaArwFvT7KYZHrcM0nr4a3/ktQIz9AlqREGXZIaYdAlqREGXZIaYdAlqREGXZIaYdAlqRH/D1owNTaz3MJhAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.boxplot(data['已批准贷款总额'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "168015.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#异常值为小于 Q1-1.5IQR 或 Q3+1.5IQR\n",
    "Q1 = np.percentile(data['已批准贷款总额'], 25)     \n",
    "Q3 = np.percentile(data['已批准贷款总额'],75)    \n",
    "IQR = Q3 - Q1   \n",
    "outlier_step = 1.5 * IQR\n",
    "high_whisker=Q3+outlier_step\n",
    "high_whisker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>对接员工编号</th>\n",
       "      <th>是否填写手机号</th>\n",
       "      <th>受否填写身份证</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>信用评分</th>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <th>主账户每月还款</th>\n",
       "      <th>次账户没用还款</th>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <th>近六个月违约次数</th>\n",
       "      <th>平均贷款期限</th>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <th>贷款查询次数</th>\n",
       "      <th>是否违约</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>贷款总次数</th>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>601758</td>\n",
       "      <td>65532</td>\n",
       "      <td>78990</td>\n",
       "      <td>84.38</td>\n",
       "      <td>136</td>\n",
       "      <td>20490</td>\n",
       "      <td>45</td>\n",
       "      <td>1981</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>2801</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.829624</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>447579</td>\n",
       "      <td>58413</td>\n",
       "      <td>67960</td>\n",
       "      <td>89.02</td>\n",
       "      <td>5</td>\n",
       "      <td>15663</td>\n",
       "      <td>86</td>\n",
       "      <td>1977</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>1032</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.859520</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>648134</td>\n",
       "      <td>72317</td>\n",
       "      <td>99750</td>\n",
       "      <td>73.68</td>\n",
       "      <td>76</td>\n",
       "      <td>17242</td>\n",
       "      <td>48</td>\n",
       "      <td>1995</td>\n",
       "      <td>2018</td>\n",
       "      <td>8</td>\n",
       "      <td>220</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>763</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>13813</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>25</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.724982</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13813</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>13814.00</td>\n",
       "      <td>13813</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>616513</td>\n",
       "      <td>63882</td>\n",
       "      <td>79605</td>\n",
       "      <td>82.91</td>\n",
       "      <td>152</td>\n",
       "      <td>14470</td>\n",
       "      <td>51</td>\n",
       "      <td>1993</td>\n",
       "      <td>2018</td>\n",
       "      <td>3</td>\n",
       "      <td>912</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>749</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>16225</td>\n",
       "      <td>17700</td>\n",
       "      <td>17700</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1475</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>24</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.802487</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>16225</td>\n",
       "      <td>17700</td>\n",
       "      <td>17700</td>\n",
       "      <td>1475</td>\n",
       "      <td>1.09</td>\n",
       "      <td>11</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.50</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>453368</td>\n",
       "      <td>54013</td>\n",
       "      <td>62371</td>\n",
       "      <td>89.79</td>\n",
       "      <td>34</td>\n",
       "      <td>16556</td>\n",
       "      <td>86</td>\n",
       "      <td>1971</td>\n",
       "      <td>2018</td>\n",
       "      <td>6</td>\n",
       "      <td>1885</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>300</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>12991</td>\n",
       "      <td>100000</td>\n",
       "      <td>100000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3207</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>30</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.865995</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>12991</td>\n",
       "      <td>100000</td>\n",
       "      <td>100000</td>\n",
       "      <td>3207</td>\n",
       "      <td>7.70</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199709</th>\n",
       "      <td>470368</td>\n",
       "      <td>52199</td>\n",
       "      <td>63387</td>\n",
       "      <td>88.35</td>\n",
       "      <td>101</td>\n",
       "      <td>24379</td>\n",
       "      <td>86</td>\n",
       "      <td>1985</td>\n",
       "      <td>2018</td>\n",
       "      <td>15</td>\n",
       "      <td>2957</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>300</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>72033</td>\n",
       "      <td>75000</td>\n",
       "      <td>80288</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5354</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>49</td>\n",
       "      <td>18</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.823497</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>72033</td>\n",
       "      <td>75000</td>\n",
       "      <td>80288</td>\n",
       "      <td>5354</td>\n",
       "      <td>1.11</td>\n",
       "      <td>14</td>\n",
       "      <td>0</td>\n",
       "      <td>1.07</td>\n",
       "      <td>1.75</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199710</th>\n",
       "      <td>427853</td>\n",
       "      <td>65489</td>\n",
       "      <td>113590</td>\n",
       "      <td>59.86</td>\n",
       "      <td>5</td>\n",
       "      <td>16817</td>\n",
       "      <td>48</td>\n",
       "      <td>1995</td>\n",
       "      <td>2018</td>\n",
       "      <td>9</td>\n",
       "      <td>2636</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.576538</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199713</th>\n",
       "      <td>598007</td>\n",
       "      <td>52303</td>\n",
       "      <td>72677</td>\n",
       "      <td>72.93</td>\n",
       "      <td>34</td>\n",
       "      <td>15142</td>\n",
       "      <td>86</td>\n",
       "      <td>1985</td>\n",
       "      <td>2018</td>\n",
       "      <td>6</td>\n",
       "      <td>1641</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.719664</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199715</th>\n",
       "      <td>634336</td>\n",
       "      <td>54509</td>\n",
       "      <td>71921</td>\n",
       "      <td>77.86</td>\n",
       "      <td>74</td>\n",
       "      <td>16846</td>\n",
       "      <td>45</td>\n",
       "      <td>1983</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>306</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.757901</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199716</th>\n",
       "      <td>638308</td>\n",
       "      <td>63147</td>\n",
       "      <td>72000</td>\n",
       "      <td>89.58</td>\n",
       "      <td>2</td>\n",
       "      <td>23169</td>\n",
       "      <td>45</td>\n",
       "      <td>1970</td>\n",
       "      <td>2018</td>\n",
       "      <td>4</td>\n",
       "      <td>57</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>708</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>97963</td>\n",
       "      <td>106508</td>\n",
       "      <td>106508</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.877042</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>97963</td>\n",
       "      <td>106508</td>\n",
       "      <td>106508</td>\n",
       "      <td>0</td>\n",
       "      <td>1.09</td>\n",
       "      <td>106508</td>\n",
       "      <td>0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>2.00</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>165467 rows × 49 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          客户编号   已发货款    资产成本  贷款与资产比列   品牌  骑车销售商  车厂  出生日期  货款日期  地区  \\\n",
       "0       601758  65532   78990    84.38  136  20490  45  1981  2018   8   \n",
       "2       447579  58413   67960    89.02    5  15663  86  1977  2018   9   \n",
       "3       648134  72317   99750    73.68   76  17242  48  1995  2018   8   \n",
       "5       616513  63882   79605    82.91  152  14470  51  1993  2018   3   \n",
       "6       453368  54013   62371    89.79   34  16556  86  1971  2018   6   \n",
       "...        ...    ...     ...      ...  ...    ...  ..   ...   ...  ..   \n",
       "199709  470368  52199   63387    88.35  101  24379  86  1985  2018  15   \n",
       "199710  427853  65489  113590    59.86    5  16817  48  1995  2018   9   \n",
       "199713  598007  52303   72677    72.93   34  15142  86  1985  2018   6   \n",
       "199715  634336  54509   71921    77.86   74  16846  45  1983  2018   4   \n",
       "199716  638308  63147   72000    89.58    2  23169  45  1970  2018   4   \n",
       "\n",
       "        对接员工编号  是否填写手机号  受否填写身份证  是否出具驾驶证  是否填写护照  信用评分  主账户贷款次数  主账户有效贷款次数  \\\n",
       "0         2801        1        1        0       0     0        0          0   \n",
       "2         1032        1        1        0       0     0        0          0   \n",
       "3          220        1        1        0       0   763        1          1   \n",
       "5          912        1        1        0       0   749        2          1   \n",
       "6         1885        1        1        0       0   300        3          1   \n",
       "...        ...      ...      ...      ...     ...   ...      ...        ...   \n",
       "199709    2957        1        1        0       0   300        6          3   \n",
       "199710    2636        1        1        0       0     0        0          0   \n",
       "199713    1641        1        1        0       0     0        0          0   \n",
       "199715     306        1        1        0       0     0        0          0   \n",
       "199716      57        1        1        0       0   708        3          2   \n",
       "\n",
       "        主账户中尚未还清有效贷款  主账户中已批准的贷款  主账户中已发放贷款  次账户贷款次数  次账户有效贷款次数  次账户中尚未还清有效贷款  \\\n",
       "0                  0           0          0        0          0             0   \n",
       "2                  0           0          0        0          0             0   \n",
       "3                  0       13813      13813        0          0             0   \n",
       "5              16225       17700      17700        0          0             0   \n",
       "6              12991      100000     100000        0          0             0   \n",
       "...              ...         ...        ...      ...        ...           ...   \n",
       "199709         72033       75000      80288        0          0             0   \n",
       "199710             0           0          0        0          0             0   \n",
       "199713             0           0          0        0          0             0   \n",
       "199715             0           0          0        0          0             0   \n",
       "199716         97963      106508     106508        0          0             0   \n",
       "\n",
       "        次账户中已批准贷款  次账户中已发放贷款  主账户每月还款  次账户没用还款  近六个月新贷款次数  近六个月违约次数  平均贷款期限  \\\n",
       "0               0          0        0        0          0         0       0   \n",
       "2               0          0        0        0          0         0       0   \n",
       "3               0          0        0        0          0         0      25   \n",
       "5               0          0     1475        0          1         0      13   \n",
       "6               0          0     3207        0          0         0      17   \n",
       "...           ...        ...      ...      ...        ...       ...     ...   \n",
       "199709          0          0     5354        0          1         1      49   \n",
       "199710          0          0        0        0          0         0       0   \n",
       "199713          0          0        0        0          0         0       0   \n",
       "199715          0          0        0        0          0         0       0   \n",
       "199716          0          0        0        0          1         0       6   \n",
       "\n",
       "        第一次贷款距今时间  贷款查询次数  是否违约    贷款与资产比  贷款总次数  主账户无效贷款次数  次账户无效贷款次数  \\\n",
       "0               0       0     1  0.829624      0          0          0   \n",
       "2               0       0     1  0.859520      0          0          0   \n",
       "3              25       0     1  0.724982      1          0          0   \n",
       "5              24       0     1  0.802487      2          1          0   \n",
       "6              30       0     1  0.865995      3          2          0   \n",
       "...           ...     ...   ...       ...    ...        ...        ...   \n",
       "199709         18       1     0  0.823497      6          3          0   \n",
       "199710          0       0     0  0.576538      0          0          0   \n",
       "199713          0       0     0  0.719664      0          0          0   \n",
       "199715          0       0     0  0.757901      0          0          0   \n",
       "199716          1       0     0  0.877042      3          1          0   \n",
       "\n",
       "        无效贷款总次数  尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额  每月还款总额  贷款与已还贷款比列  主账户还款期数  \\\n",
       "0             0           0        0        0       0       1.00        0   \n",
       "2             0           0        0        0       0       1.00        0   \n",
       "3             0           0    13813    13813       0   13814.00    13813   \n",
       "5             1       16225    17700    17700    1475       1.09       11   \n",
       "6             2       12991   100000   100000    3207       7.70       31   \n",
       "...         ...         ...      ...      ...     ...        ...      ...   \n",
       "199709        3       72033    75000    80288    5354       1.11       14   \n",
       "199710        0           0        0        0       0       1.00        0   \n",
       "199713        0           0        0        0       0       1.00        0   \n",
       "199715        0           0        0        0       0       1.00        0   \n",
       "199716        1       97963   106508   106508       0       1.09   106508   \n",
       "\n",
       "        次账户还款期数  贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  \n",
       "0             0        1.00            1.00     0  \n",
       "2             0        1.00            1.00     1  \n",
       "3             0        1.00            2.00     0  \n",
       "5             0        1.00            1.50     0  \n",
       "6             0        1.00            1.33     1  \n",
       "...         ...         ...             ...   ...  \n",
       "199709        0        1.07            1.75     0  \n",
       "199710        0        1.00            1.00     1  \n",
       "199713        0        1.00            1.00     0  \n",
       "199715        0        1.00            1.00     1  \n",
       "199716        0        1.00            2.00     2  \n",
       "\n",
       "[165467 rows x 49 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data=data[data['已批准贷款总额'] <high_whisker] \n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 缺失值处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "客户编号              0\n",
       "已发货款              0\n",
       "资产成本              0\n",
       "贷款与资产比列           0\n",
       "品牌                0\n",
       "骑车销售商             0\n",
       "车厂                0\n",
       "出生日期              0\n",
       "货款日期              0\n",
       "地区                0\n",
       "对接员工编号            0\n",
       "是否填写手机号           0\n",
       "受否填写身份证           0\n",
       "是否出具驾驶证           0\n",
       "是否填写护照            0\n",
       "信用评分              0\n",
       "主账户贷款次数           0\n",
       "主账户有效贷款次数         0\n",
       "主账户中尚未还清有效贷款      0\n",
       "主账户中已批准的贷款        0\n",
       "主账户中已发放贷款         0\n",
       "次账户贷款次数           0\n",
       "次账户有效贷款次数         0\n",
       "次账户中尚未还清有效贷款      0\n",
       "次账户中已批准贷款         0\n",
       "次账户中已发放贷款         0\n",
       "主账户每月还款           0\n",
       "次账户没用还款           0\n",
       "近六个月新贷款次数         0\n",
       "近六个月违约次数          0\n",
       "平均贷款期限            0\n",
       "第一次贷款距今时间         0\n",
       "贷款查询次数            0\n",
       "是否违约              0\n",
       "贷款与资产比            0\n",
       "贷款总次数             0\n",
       "主账户无效贷款次数         0\n",
       "次账户无效贷款次数         0\n",
       "无效贷款总次数           0\n",
       "尚未还清有效贷款总额        0\n",
       "已批准贷款总额           0\n",
       "已发放贷款总额           0\n",
       "每月还款总额            0\n",
       "贷款与已还贷款比列         0\n",
       "主账户还款期数           0\n",
       "次账户还款期数           0\n",
       "贷款与已批准贷款比列        0\n",
       "总贷款次数与总有效贷款次数比    0\n",
       "工作类型              0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.isna().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "无缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ASUS\\Anaconda3\\envs\\py36\\lib\\site-packages\\pandas\\core\\frame.py:4278: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  method=method,\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "客户编号               0\n",
       "已发货款               0\n",
       "资产成本               0\n",
       "贷款与资产比列            0\n",
       "品牌                 0\n",
       "骑车销售商              0\n",
       "车厂                 0\n",
       "出生日期               0\n",
       "货款日期               0\n",
       "地区                 0\n",
       "对接员工编号             0\n",
       "是否填写手机号            0\n",
       "受否填写身份证            0\n",
       "是否出具驾驶证            0\n",
       "是否填写护照             0\n",
       "信用评分               0\n",
       "主账户贷款次数            0\n",
       "主账户有效贷款次数          0\n",
       "主账户中尚未还清有效贷款       0\n",
       "主账户中已批准的贷款         0\n",
       "主账户中已发放贷款          0\n",
       "次账户贷款次数            0\n",
       "次账户有效贷款次数          0\n",
       "次账户中尚未还清有效贷款       0\n",
       "次账户中已批准贷款          0\n",
       "次账户中已发放贷款          0\n",
       "主账户每月还款            0\n",
       "次账户没用还款            0\n",
       "近六个月新贷款次数          0\n",
       "近六个月违约次数           0\n",
       "平均贷款期限             0\n",
       "第一次贷款距今时间          0\n",
       "贷款查询次数             0\n",
       "是否违约               0\n",
       "贷款与资产比             0\n",
       "贷款总次数              0\n",
       "主账户无效贷款次数          0\n",
       "次账户无效贷款次数          0\n",
       "无效贷款总次数            0\n",
       "尚未还清有效贷款总额         0\n",
       "已批准贷款总额            0\n",
       "已发放贷款总额            0\n",
       "每月还款总额             0\n",
       "贷款与已还贷款比列         19\n",
       "主账户还款期数            0\n",
       "次账户还款期数            0\n",
       "贷款与已批准贷款比列         0\n",
       "总贷款次数与总有效贷款次数比     0\n",
       "工作类型               0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#将贷款与已还款比例这一列的inf值替换为nan\n",
    "data.replace(np.inf,np.nan,inplace=True)\n",
    "data.isna().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ASUS\\Anaconda3\\envs\\py36\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "客户编号              0\n",
       "已发货款              0\n",
       "资产成本              0\n",
       "贷款与资产比列           0\n",
       "品牌                0\n",
       "骑车销售商             0\n",
       "车厂                0\n",
       "出生日期              0\n",
       "货款日期              0\n",
       "地区                0\n",
       "对接员工编号            0\n",
       "是否填写手机号           0\n",
       "受否填写身份证           0\n",
       "是否出具驾驶证           0\n",
       "是否填写护照            0\n",
       "信用评分              0\n",
       "主账户贷款次数           0\n",
       "主账户有效贷款次数         0\n",
       "主账户中尚未还清有效贷款      0\n",
       "主账户中已批准的贷款        0\n",
       "主账户中已发放贷款         0\n",
       "次账户贷款次数           0\n",
       "次账户有效贷款次数         0\n",
       "次账户中尚未还清有效贷款      0\n",
       "次账户中已批准贷款         0\n",
       "次账户中已发放贷款         0\n",
       "主账户每月还款           0\n",
       "次账户没用还款           0\n",
       "近六个月新贷款次数         0\n",
       "近六个月违约次数          0\n",
       "平均贷款期限            0\n",
       "第一次贷款距今时间         0\n",
       "贷款查询次数            0\n",
       "是否违约              0\n",
       "贷款与资产比            0\n",
       "贷款总次数             0\n",
       "主账户无效贷款次数         0\n",
       "次账户无效贷款次数         0\n",
       "无效贷款总次数           0\n",
       "尚未还清有效贷款总额        0\n",
       "已批准贷款总额           0\n",
       "已发放贷款总额           0\n",
       "每月还款总额            0\n",
       "贷款与已还贷款比列         0\n",
       "主账户还款期数           0\n",
       "次账户还款期数           0\n",
       "贷款与已批准贷款比列        0\n",
       "总贷款次数与总有效贷款次数比    0\n",
       "工作类型              0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#删除nan 并且查看\n",
    "data.dropna(inplace=True)\n",
    "data.isna().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 下采样"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    134723\n",
       "1     30725\n",
       "Name: 是否违约, dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.是否违约.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['客户编号',\n",
       " '已发货款',\n",
       " '资产成本',\n",
       " '贷款与资产比列',\n",
       " '品牌',\n",
       " '骑车销售商',\n",
       " '车厂',\n",
       " '出生日期',\n",
       " '货款日期',\n",
       " '地区',\n",
       " '对接员工编号',\n",
       " '是否填写手机号',\n",
       " '受否填写身份证',\n",
       " '是否出具驾驶证',\n",
       " '是否填写护照',\n",
       " '信用评分',\n",
       " '主账户贷款次数',\n",
       " '主账户有效贷款次数',\n",
       " '主账户中尚未还清有效贷款',\n",
       " '主账户中已批准的贷款',\n",
       " '主账户中已发放贷款',\n",
       " '次账户贷款次数',\n",
       " '次账户有效贷款次数',\n",
       " '次账户中尚未还清有效贷款',\n",
       " '次账户中已批准贷款',\n",
       " '次账户中已发放贷款',\n",
       " '主账户每月还款',\n",
       " '次账户没用还款',\n",
       " '近六个月新贷款次数',\n",
       " '近六个月违约次数',\n",
       " '平均贷款期限',\n",
       " '第一次贷款距今时间',\n",
       " '贷款查询次数',\n",
       " '贷款与资产比',\n",
       " '贷款总次数',\n",
       " '主账户无效贷款次数',\n",
       " '次账户无效贷款次数',\n",
       " '无效贷款总次数',\n",
       " '尚未还清有效贷款总额',\n",
       " '已批准贷款总额',\n",
       " '已发放贷款总额',\n",
       " '每月还款总额',\n",
       " '贷款与已还贷款比列',\n",
       " '主账户还款期数',\n",
       " '次账户还款期数',\n",
       " '贷款与已批准贷款比列',\n",
       " '总贷款次数与总有效贷款次数比',\n",
       " '工作类型']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features=[i for i in data.columns if i not in ['是否违约']]\n",
    "features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "from imblearn.under_sampling  import NearMiss\n",
    "nm =NearMiss(version=1)\n",
    "x_resampled, y_resampled = nm.fit_sample(data[features],data['是否违约'])\n",
    "df =pd.concat([pd.DataFrame(x_resampled),pd.Series(y_resampled)],axis=1)\n",
    "df.columns=[*features,'是否违约']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>对接员工编号</th>\n",
       "      <th>是否填写手机号</th>\n",
       "      <th>受否填写身份证</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>信用评分</th>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <th>主账户每月还款</th>\n",
       "      <th>次账户没用还款</th>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <th>近六个月违约次数</th>\n",
       "      <th>平均贷款期限</th>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <th>贷款查询次数</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>贷款总次数</th>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>是否违约</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>444422.0</td>\n",
       "      <td>30341.0</td>\n",
       "      <td>42940.0</td>\n",
       "      <td>74.52</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1959.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.706591</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>444630.0</td>\n",
       "      <td>43617.0</td>\n",
       "      <td>58075.0</td>\n",
       "      <td>79.90</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1989.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2309.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.751046</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>486419.0</td>\n",
       "      <td>56259.0</td>\n",
       "      <td>68156.0</td>\n",
       "      <td>83.63</td>\n",
       "      <td>146.0</td>\n",
       "      <td>15694.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>1996.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>594.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.825445</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>498555.0</td>\n",
       "      <td>55013.0</td>\n",
       "      <td>67340.0</td>\n",
       "      <td>84.65</td>\n",
       "      <td>251.0</td>\n",
       "      <td>22994.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>1993.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>2546.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.816944</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>485365.0</td>\n",
       "      <td>43617.0</td>\n",
       "      <td>58075.0</td>\n",
       "      <td>79.90</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1978.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1959.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.751046</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       客户编号     已发货款     资产成本  贷款与资产比列     品牌    骑车销售商    车厂    出生日期    货款日期  \\\n",
       "0  444422.0  30341.0  42940.0    74.52  105.0  20292.0  48.0  1992.0  2018.0   \n",
       "1  444630.0  43617.0  58075.0    79.90  105.0  20292.0  48.0  1989.0  2018.0   \n",
       "2  486419.0  56259.0  68156.0    83.63  146.0  15694.0  86.0  1996.0  2018.0   \n",
       "3  498555.0  55013.0  67340.0    84.65  251.0  22994.0  45.0  1993.0  2018.0   \n",
       "4  485365.0  43617.0  58075.0    79.90  105.0  20292.0  48.0  1978.0  2018.0   \n",
       "\n",
       "     地区  对接员工编号  是否填写手机号  受否填写身份证  是否出具驾驶证  是否填写护照  信用评分  主账户贷款次数  主账户有效贷款次数  \\\n",
       "0   6.0  1959.0      1.0      1.0      0.0     0.0   0.0      0.0        0.0   \n",
       "1   6.0  2309.0      1.0      1.0      0.0     0.0   0.0      0.0        0.0   \n",
       "2  14.0   594.0      1.0      1.0      0.0     0.0   0.0      0.0        0.0   \n",
       "3  13.0  2546.0      1.0      1.0      0.0     0.0   0.0      0.0        0.0   \n",
       "4   6.0  1959.0      1.0      1.0      0.0     0.0   0.0      0.0        0.0   \n",
       "\n",
       "   主账户中尚未还清有效贷款  主账户中已批准的贷款  主账户中已发放贷款  次账户贷款次数  次账户有效贷款次数  次账户中尚未还清有效贷款  \\\n",
       "0           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "1           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "2           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "3           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "4           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "\n",
       "   次账户中已批准贷款  次账户中已发放贷款  主账户每月还款  次账户没用还款  近六个月新贷款次数  近六个月违约次数  平均贷款期限  \\\n",
       "0        0.0        0.0      0.0      0.0        0.0       0.0     0.0   \n",
       "1        0.0        0.0      0.0      0.0        0.0       0.0     0.0   \n",
       "2        0.0        0.0      0.0      0.0        0.0       0.0     0.0   \n",
       "3        0.0        0.0      0.0      0.0        0.0       0.0     0.0   \n",
       "4        0.0        0.0      0.0      0.0        0.0       0.0     0.0   \n",
       "\n",
       "   第一次贷款距今时间  贷款查询次数    贷款与资产比  贷款总次数  主账户无效贷款次数  次账户无效贷款次数  无效贷款总次数  \\\n",
       "0        0.0     0.0  0.706591    0.0        0.0        0.0      0.0   \n",
       "1        0.0     0.0  0.751046    0.0        0.0        0.0      0.0   \n",
       "2        0.0     0.0  0.825445    0.0        0.0        0.0      0.0   \n",
       "3        0.0     0.0  0.816944    0.0        0.0        0.0      0.0   \n",
       "4        0.0     0.0  0.751046    0.0        0.0        0.0      0.0   \n",
       "\n",
       "   尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额  每月还款总额  贷款与已还贷款比列  主账户还款期数  次账户还款期数  \\\n",
       "0         0.0      0.0      0.0     0.0        1.0      0.0      0.0   \n",
       "1         0.0      0.0      0.0     0.0        1.0      0.0      0.0   \n",
       "2         0.0      0.0      0.0     0.0        1.0      0.0      0.0   \n",
       "3         0.0      0.0      0.0     0.0        1.0      0.0      0.0   \n",
       "4         0.0      0.0      0.0     0.0        1.0      0.0      0.0   \n",
       "\n",
       "   贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  是否违约  \n",
       "0         1.0             1.0   1.0     0  \n",
       "1         1.0             1.0   1.0     0  \n",
       "2         1.0             1.0   1.0     0  \n",
       "3         1.0             1.0   1.0     0  \n",
       "4         1.0             1.0   1.0     0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 特征构造"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        26.0\n",
       "1        29.0\n",
       "2        22.0\n",
       "3        25.0\n",
       "4        40.0\n",
       "         ... \n",
       "61445    45.0\n",
       "61446    33.0\n",
       "61447    30.0\n",
       "61448    21.0\n",
       "61449    36.0\n",
       "Name: 年龄, Length: 61450, dtype: float64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_copy=df.copy()\n",
    "df_copy['年龄']=df_copy['货款日期']-df_copy['出生日期']\n",
    "df_copy['年龄'] \n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1=df_copy.groupby('品牌')['客户编号'].count()\n",
    "df1=pd.DataFrame(df1).rename(columns={'客户编号':'品牌被贷款数量'})\n",
    "df2=df_copy.groupby('品牌')['已发货款'].mean()\n",
    "df2=pd.DataFrame(df2).rename(columns={'已发货款':'品牌平均已发货款'}).round(0)\n",
    "df3=df_copy.groupby('品牌')[['主账户有效贷款次数','主账户中尚未还清有效贷款','主账户每月还款','平均贷款期限','每月还款总额']].mean()\n",
    "df3=pd.DataFrame(df3).rename(columns={'主账户有效贷款次数':'平均主账户有效贷款次数',\n",
    "                                      '主账户中尚未还清有效贷款':'平均主账户中尚未还清有效贷款',\n",
    "                                      '主账户每月还款':'平均主账户每月还款',\n",
    "                                       '每月还款总额':'平均每月还款总额' }).round(0)\n",
    "df_0=pd.concat([df1,df2,df3],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>品牌被贷款数量</th>\n",
       "      <th>品牌平均已发货款</th>\n",
       "      <th>平均主账户有效贷款次数</th>\n",
       "      <th>平均主账户中尚未还清有效贷款</th>\n",
       "      <th>平均主账户每月还款</th>\n",
       "      <th>平均贷款期限</th>\n",
       "      <th>平均每月还款总额</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>品牌</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>1213</td>\n",
       "      <td>58401.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3765.0</td>\n",
       "      <td>489.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>491.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2.0</th>\n",
       "      <td>3384</td>\n",
       "      <td>60176.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3992.0</td>\n",
       "      <td>620.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>622.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3.0</th>\n",
       "      <td>1855</td>\n",
       "      <td>50639.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5377.0</td>\n",
       "      <td>978.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1018.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5.0</th>\n",
       "      <td>2503</td>\n",
       "      <td>56131.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6709.0</td>\n",
       "      <td>2371.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2408.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7.0</th>\n",
       "      <td>724</td>\n",
       "      <td>57688.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4462.0</td>\n",
       "      <td>1087.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1087.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>257.0</th>\n",
       "      <td>353</td>\n",
       "      <td>50784.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4164.0</td>\n",
       "      <td>262.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>262.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>258.0</th>\n",
       "      <td>96</td>\n",
       "      <td>54917.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3135.0</td>\n",
       "      <td>190.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>190.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259.0</th>\n",
       "      <td>93</td>\n",
       "      <td>48885.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4191.0</td>\n",
       "      <td>453.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>453.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>260.0</th>\n",
       "      <td>63</td>\n",
       "      <td>59670.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13018.0</td>\n",
       "      <td>32321.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>32592.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>261.0</th>\n",
       "      <td>29</td>\n",
       "      <td>56684.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>11350.0</td>\n",
       "      <td>727.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>727.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>82 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       品牌被贷款数量  品牌平均已发货款  平均主账户有效贷款次数  平均主账户中尚未还清有效贷款  平均主账户每月还款  平均贷款期限  \\\n",
       "品牌                                                                         \n",
       "1.0       1213   58401.0          0.0          3765.0      489.0     4.0   \n",
       "2.0       3384   60176.0          0.0          3992.0      620.0     3.0   \n",
       "3.0       1855   50639.0          0.0          5377.0      978.0     4.0   \n",
       "5.0       2503   56131.0          0.0          6709.0     2371.0     4.0   \n",
       "7.0        724   57688.0          0.0          4462.0     1087.0     4.0   \n",
       "...        ...       ...          ...             ...        ...     ...   \n",
       "257.0      353   50784.0          0.0          4164.0      262.0     3.0   \n",
       "258.0       96   54917.0          0.0          3135.0      190.0     3.0   \n",
       "259.0       93   48885.0          0.0          4191.0      453.0     5.0   \n",
       "260.0       63   59670.0          1.0         13018.0    32321.0     4.0   \n",
       "261.0       29   56684.0          1.0         11350.0      727.0     1.0   \n",
       "\n",
       "       平均每月还款总额  \n",
       "品牌               \n",
       "1.0       491.0  \n",
       "2.0       622.0  \n",
       "3.0      1018.0  \n",
       "5.0      2408.0  \n",
       "7.0      1087.0  \n",
       "...         ...  \n",
       "257.0     262.0  \n",
       "258.0     190.0  \n",
       "259.0     453.0  \n",
       "260.0   32592.0  \n",
       "261.0     727.0  \n",
       "\n",
       "[82 rows x 7 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_copy.drop(columns=['是否填写手机号','受否填写身份证'],axis=0,inplace=True)       "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_copy['剩余发放贷款总额']=pd.Series(df_copy['已批准贷款总额']-df_copy['已发放贷款总额']) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>已发货款</th>\n",
       "      <th>资产成本</th>\n",
       "      <th>贷款与资产比列</th>\n",
       "      <th>品牌</th>\n",
       "      <th>骑车销售商</th>\n",
       "      <th>车厂</th>\n",
       "      <th>出生日期</th>\n",
       "      <th>货款日期</th>\n",
       "      <th>地区</th>\n",
       "      <th>对接员工编号</th>\n",
       "      <th>是否出具驾驶证</th>\n",
       "      <th>是否填写护照</th>\n",
       "      <th>信用评分</th>\n",
       "      <th>主账户贷款次数</th>\n",
       "      <th>主账户有效贷款次数</th>\n",
       "      <th>主账户中尚未还清有效贷款</th>\n",
       "      <th>主账户中已批准的贷款</th>\n",
       "      <th>主账户中已发放贷款</th>\n",
       "      <th>次账户贷款次数</th>\n",
       "      <th>次账户有效贷款次数</th>\n",
       "      <th>次账户中尚未还清有效贷款</th>\n",
       "      <th>次账户中已批准贷款</th>\n",
       "      <th>次账户中已发放贷款</th>\n",
       "      <th>主账户每月还款</th>\n",
       "      <th>次账户没用还款</th>\n",
       "      <th>近六个月新贷款次数</th>\n",
       "      <th>近六个月违约次数</th>\n",
       "      <th>平均贷款期限_x</th>\n",
       "      <th>第一次贷款距今时间</th>\n",
       "      <th>贷款查询次数</th>\n",
       "      <th>贷款与资产比</th>\n",
       "      <th>贷款总次数</th>\n",
       "      <th>主账户无效贷款次数</th>\n",
       "      <th>次账户无效贷款次数</th>\n",
       "      <th>无效贷款总次数</th>\n",
       "      <th>尚未还清有效贷款总额</th>\n",
       "      <th>已批准贷款总额</th>\n",
       "      <th>已发放贷款总额</th>\n",
       "      <th>每月还款总额</th>\n",
       "      <th>贷款与已还贷款比列</th>\n",
       "      <th>主账户还款期数</th>\n",
       "      <th>次账户还款期数</th>\n",
       "      <th>贷款与已批准贷款比列</th>\n",
       "      <th>总贷款次数与总有效贷款次数比</th>\n",
       "      <th>工作类型</th>\n",
       "      <th>是否违约</th>\n",
       "      <th>年龄</th>\n",
       "      <th>剩余发放贷款总额</th>\n",
       "      <th>品牌被贷款数量</th>\n",
       "      <th>品牌平均已发货款</th>\n",
       "      <th>平均主账户有效贷款次数</th>\n",
       "      <th>平均主账户中尚未还清有效贷款</th>\n",
       "      <th>平均主账户每月还款</th>\n",
       "      <th>平均贷款期限_y</th>\n",
       "      <th>平均每月还款总额</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>444422.0</td>\n",
       "      <td>30341.0</td>\n",
       "      <td>42940.0</td>\n",
       "      <td>74.52</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1959.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.706591</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>805</td>\n",
       "      <td>50531.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7385.0</td>\n",
       "      <td>1522.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1522.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>444630.0</td>\n",
       "      <td>43617.0</td>\n",
       "      <td>58075.0</td>\n",
       "      <td>79.90</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1989.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2309.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.751046</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>805</td>\n",
       "      <td>50531.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7385.0</td>\n",
       "      <td>1522.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1522.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>485365.0</td>\n",
       "      <td>43617.0</td>\n",
       "      <td>58075.0</td>\n",
       "      <td>79.90</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1978.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1959.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.751046</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>805</td>\n",
       "      <td>50531.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7385.0</td>\n",
       "      <td>1522.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1522.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>609649.0</td>\n",
       "      <td>52703.0</td>\n",
       "      <td>68930.0</td>\n",
       "      <td>79.79</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1973.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2397.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.764587</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>805</td>\n",
       "      <td>50531.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7385.0</td>\n",
       "      <td>1522.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1522.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>608981.0</td>\n",
       "      <td>52478.0</td>\n",
       "      <td>68930.0</td>\n",
       "      <td>79.79</td>\n",
       "      <td>105.0</td>\n",
       "      <td>20292.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1983.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>2397.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.761323</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>35.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>805</td>\n",
       "      <td>50531.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7385.0</td>\n",
       "      <td>1522.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1522.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61445</th>\n",
       "      <td>591118.0</td>\n",
       "      <td>52213.0</td>\n",
       "      <td>69200.0</td>\n",
       "      <td>78.76</td>\n",
       "      <td>260.0</td>\n",
       "      <td>23961.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1984.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3069.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>721.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>47000.0</td>\n",
       "      <td>47000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.754523</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>47000.0</td>\n",
       "      <td>47000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>47001.00</td>\n",
       "      <td>47000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.50</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>63</td>\n",
       "      <td>59670.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13018.0</td>\n",
       "      <td>32321.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>32592.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61446</th>\n",
       "      <td>474998.0</td>\n",
       "      <td>83269.0</td>\n",
       "      <td>126100.0</td>\n",
       "      <td>68.20</td>\n",
       "      <td>260.0</td>\n",
       "      <td>23961.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1992.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3069.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.660341</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>26.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>63</td>\n",
       "      <td>59670.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13018.0</td>\n",
       "      <td>32321.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>32592.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61447</th>\n",
       "      <td>623946.0</td>\n",
       "      <td>59934.0</td>\n",
       "      <td>83744.0</td>\n",
       "      <td>74.83</td>\n",
       "      <td>260.0</td>\n",
       "      <td>24066.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1979.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>3723.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>624.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>78758.0</td>\n",
       "      <td>78000.0</td>\n",
       "      <td>78000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>78000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.715681</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>78758.0</td>\n",
       "      <td>78000.0</td>\n",
       "      <td>78000.0</td>\n",
       "      <td>78000.0</td>\n",
       "      <td>0.99</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.33</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>39.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>63</td>\n",
       "      <td>59670.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13018.0</td>\n",
       "      <td>32321.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>32592.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61448</th>\n",
       "      <td>610472.0</td>\n",
       "      <td>84360.0</td>\n",
       "      <td>110661.0</td>\n",
       "      <td>79.52</td>\n",
       "      <td>260.0</td>\n",
       "      <td>24066.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>1982.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2797.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>738.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>26526.0</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10204.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.762328</td>\n",
       "      <td>9.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>26526.0</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>55000.0</td>\n",
       "      <td>10204.0</td>\n",
       "      <td>2.07</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>63</td>\n",
       "      <td>59670.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13018.0</td>\n",
       "      <td>32321.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>32592.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61449</th>\n",
       "      <td>460731.0</td>\n",
       "      <td>53337.0</td>\n",
       "      <td>63634.0</td>\n",
       "      <td>84.91</td>\n",
       "      <td>260.0</td>\n",
       "      <td>24109.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>1997.0</td>\n",
       "      <td>2018.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2559.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.838184</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>63</td>\n",
       "      <td>59670.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13018.0</td>\n",
       "      <td>32321.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>32592.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>61450 rows × 56 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "           客户编号     已发货款      资产成本  贷款与资产比列     品牌    骑车销售商    车厂    出生日期  \\\n",
       "0      444422.0  30341.0   42940.0    74.52  105.0  20292.0  48.0  1992.0   \n",
       "1      444630.0  43617.0   58075.0    79.90  105.0  20292.0  48.0  1989.0   \n",
       "2      485365.0  43617.0   58075.0    79.90  105.0  20292.0  48.0  1978.0   \n",
       "3      609649.0  52703.0   68930.0    79.79  105.0  20292.0  48.0  1973.0   \n",
       "4      608981.0  52478.0   68930.0    79.79  105.0  20292.0  48.0  1983.0   \n",
       "...         ...      ...       ...      ...    ...      ...   ...     ...   \n",
       "61445  591118.0  52213.0   69200.0    78.76  260.0  23961.0  48.0  1984.0   \n",
       "61446  474998.0  83269.0  126100.0    68.20  260.0  23961.0  48.0  1992.0   \n",
       "61447  623946.0  59934.0   83744.0    74.83  260.0  24066.0  48.0  1979.0   \n",
       "61448  610472.0  84360.0  110661.0    79.52  260.0  24066.0  48.0  1982.0   \n",
       "61449  460731.0  53337.0   63634.0    84.91  260.0  24109.0  45.0  1997.0   \n",
       "\n",
       "         货款日期   地区  对接员工编号  是否出具驾驶证  是否填写护照   信用评分  主账户贷款次数  主账户有效贷款次数  \\\n",
       "0      2018.0  6.0  1959.0      0.0     0.0    0.0      0.0        0.0   \n",
       "1      2018.0  6.0  2309.0      0.0     0.0    0.0      0.0        0.0   \n",
       "2      2018.0  6.0  1959.0      0.0     0.0    0.0      0.0        0.0   \n",
       "3      2018.0  6.0  2397.0      0.0     0.0    0.0      0.0        0.0   \n",
       "4      2018.0  6.0  2397.0      0.0     0.0    0.0      0.0        0.0   \n",
       "...       ...  ...     ...      ...     ...    ...      ...        ...   \n",
       "61445  2018.0  8.0  3069.0      0.0     0.0  721.0      2.0        1.0   \n",
       "61446  2018.0  8.0  3069.0      0.0     0.0    0.0      0.0        0.0   \n",
       "61447  2018.0  8.0  3723.0      0.0     0.0  624.0      3.0        1.0   \n",
       "61448  2018.0  8.0  2797.0      0.0     0.0  738.0      9.0        2.0   \n",
       "61449  2018.0  8.0  2559.0      0.0     0.0    0.0      0.0        0.0   \n",
       "\n",
       "       主账户中尚未还清有效贷款  主账户中已批准的贷款  主账户中已发放贷款  次账户贷款次数  次账户有效贷款次数  次账户中尚未还清有效贷款  \\\n",
       "0               0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "1               0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "2               0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "3               0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "4               0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "...             ...         ...        ...      ...        ...           ...   \n",
       "61445           0.0     47000.0    47000.0      0.0        0.0           0.0   \n",
       "61446           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "61447       78758.0     78000.0    78000.0      0.0        0.0           0.0   \n",
       "61448       26526.0     55000.0    55000.0      0.0        0.0           0.0   \n",
       "61449           0.0         0.0        0.0      0.0        0.0           0.0   \n",
       "\n",
       "       次账户中已批准贷款  次账户中已发放贷款  主账户每月还款  次账户没用还款  近六个月新贷款次数  近六个月违约次数  平均贷款期限_x  \\\n",
       "0            0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "1            0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "2            0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "3            0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "4            0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "...          ...        ...      ...      ...        ...       ...       ...   \n",
       "61445        0.0        0.0      0.0      0.0        0.0       0.0       1.0   \n",
       "61446        0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "61447        0.0        0.0  78000.0      0.0        1.0       0.0       8.0   \n",
       "61448        0.0        0.0  10204.0      0.0        0.0       0.0      14.0   \n",
       "61449        0.0        0.0      0.0      0.0        0.0       0.0       0.0   \n",
       "\n",
       "       第一次贷款距今时间  贷款查询次数    贷款与资产比  贷款总次数  主账户无效贷款次数  次账户无效贷款次数  无效贷款总次数  \\\n",
       "0            0.0     0.0  0.706591    0.0        0.0        0.0      0.0   \n",
       "1            0.0     0.0  0.751046    0.0        0.0        0.0      0.0   \n",
       "2            0.0     0.0  0.751046    0.0        0.0        0.0      0.0   \n",
       "3            0.0     0.0  0.764587    0.0        0.0        0.0      0.0   \n",
       "4            0.0     0.0  0.761323    0.0        0.0        0.0      0.0   \n",
       "...          ...     ...       ...    ...        ...        ...      ...   \n",
       "61445       15.0     0.0  0.754523    2.0        1.0        0.0      1.0   \n",
       "61446        0.0     2.0  0.660341    0.0        0.0        0.0      0.0   \n",
       "61447       12.0     0.0  0.715681    3.0        2.0        0.0      2.0   \n",
       "61448       24.0     0.0  0.762328    9.0        7.0        0.0      7.0   \n",
       "61449        0.0     0.0  0.838184    0.0        0.0        0.0      0.0   \n",
       "\n",
       "       尚未还清有效贷款总额  已批准贷款总额  已发放贷款总额   每月还款总额  贷款与已还贷款比列  主账户还款期数  次账户还款期数  \\\n",
       "0             0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "1             0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "2             0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "3             0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "4             0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "...           ...      ...      ...      ...        ...      ...      ...   \n",
       "61445         0.0  47000.0  47000.0      0.0   47001.00  47000.0      0.0   \n",
       "61446         0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "61447     78758.0  78000.0  78000.0  78000.0       0.99      0.0      0.0   \n",
       "61448     26526.0  55000.0  55000.0  10204.0       2.07      5.0      0.0   \n",
       "61449         0.0      0.0      0.0      0.0       1.00      0.0      0.0   \n",
       "\n",
       "       贷款与已批准贷款比列  总贷款次数与总有效贷款次数比  工作类型  是否违约    年龄  剩余发放贷款总额  品牌被贷款数量  \\\n",
       "0             1.0            1.00   1.0     0  26.0       0.0      805   \n",
       "1             1.0            1.00   1.0     0  29.0       0.0      805   \n",
       "2             1.0            1.00   1.0     0  40.0       0.0      805   \n",
       "3             1.0            1.00   1.0     0  45.0       0.0      805   \n",
       "4             1.0            1.00   1.0     0  35.0       0.0      805   \n",
       "...           ...             ...   ...   ...   ...       ...      ...   \n",
       "61445         1.0            1.50   1.0     1  34.0       0.0       63   \n",
       "61446         1.0            1.00   1.0     1  26.0       0.0       63   \n",
       "61447         1.0            1.33   0.0     1  39.0       0.0       63   \n",
       "61448         1.0            1.25   0.0     1  36.0       0.0       63   \n",
       "61449         1.0            1.00   1.0     1  21.0       0.0       63   \n",
       "\n",
       "       品牌平均已发货款  平均主账户有效贷款次数  平均主账户中尚未还清有效贷款  平均主账户每月还款  平均贷款期限_y  平均每月还款总额  \n",
       "0       50531.0          0.0          7385.0     1522.0       4.0    1522.0  \n",
       "1       50531.0          0.0          7385.0     1522.0       4.0    1522.0  \n",
       "2       50531.0          0.0          7385.0     1522.0       4.0    1522.0  \n",
       "3       50531.0          0.0          7385.0     1522.0       4.0    1522.0  \n",
       "4       50531.0          0.0          7385.0     1522.0       4.0    1522.0  \n",
       "...         ...          ...             ...        ...       ...       ...  \n",
       "61445   59670.0          1.0         13018.0    32321.0       4.0   32592.0  \n",
       "61446   59670.0          1.0         13018.0    32321.0       4.0   32592.0  \n",
       "61447   59670.0          1.0         13018.0    32321.0       4.0   32592.0  \n",
       "61448   59670.0          1.0         13018.0    32321.0       4.0   32592.0  \n",
       "61449   59670.0          1.0         13018.0    32321.0       4.0   32592.0  \n",
       "\n",
       "[61450 rows x 56 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_copy1=pd.merge(df_copy,df_0,on='品牌')\n",
    "df_copy1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import time\n",
    "# 模型处理模块\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "# 标准化处理模块\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "# 常规模型\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "# 集成学习和stacking模型\n",
    "from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier\n",
    "import xgboost as xgb\n",
    "from xgboost.sklearn import XGBClassifier\n",
    "#from mlxtend.classifier import StackingClassifier\n",
    "# 评价标准模块\n",
    "from sklearn import metrics\n",
    "from sklearn.metrics import accuracy_score,roc_auc_score,recall_score,precision_score,f1_score\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "features_=[i for i in df_copy1.columns if i not in ['客户编号','对接员工编号','是否违约']]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train,X_test,y_train,y_test=train_test_split(df_copy1[features_],df_copy1['是否违约'],test_size=0.2,random_state=2021)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_model(X_train, y_train, X_test, y_test,\n",
    "               model,model_name):\n",
    "    \n",
    "    print('训练{}'.format(model_name))\n",
    "   \n",
    "    \n",
    "    clf=model\n",
    "    start = time.time()\n",
    "    clf.fit(X_train, y_train.values.ravel())\n",
    "    \n",
    "     #验证模型\n",
    "    print('训练准确率：{:.4f}'.format(clf.score(X_train, y_train)))\n",
    "    \n",
    "    \n",
    "    predict=clf.predict(X_test)\n",
    "    score = clf.score(X_test, y_test)\n",
    "    precision=precision_score(y_test,predict)\n",
    "    recall=recall_score(y_test,predict)\n",
    "    f1=f1_score(y_test,predict)\n",
    "    print('测试准确率：{:.4f}'.format(score))\n",
    "    print('测试精确率：{:.4f}'.format(precision))\n",
    "    print('测试召回率：{:.4f}'.format(recall))\n",
    "    print('测试f1_score：{:.4f}'.format(f1))\n",
    "    \n",
    "    end = time.time()\n",
    "    duration = end - start\n",
    "    print('模型训练耗时：{:6f}s'.format(duration))\n",
    "    print('*'*50)\n",
    "    \n",
    "    return clf, score,precision,recall, duration"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 多种模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "训练LR\n",
      "训练准确率：0.7354\n",
      "测试准确率：0.7352\n",
      "测试精确率：0.8828\n",
      "测试召回率：0.5439\n",
      "测试f1_score：0.6731\n",
      "模型训练耗时：2.141123s\n",
      "**************************************************\n",
      "训练DT\n",
      "训练准确率：0.7695\n",
      "测试准确率：0.7521\n",
      "测试精确率：0.8695\n",
      "测试召回率：0.5947\n",
      "测试f1_score：0.7063\n",
      "模型训练耗时：0.894051s\n",
      "**************************************************\n",
      "训练AdaBoost\n",
      "训练准确率：0.7598\n",
      "测试准确率：0.7566\n",
      "测试精确率：0.8622\n",
      "测试召回率：0.6124\n",
      "测试f1_score：0.7161\n",
      "模型训练耗时：7.987457s\n",
      "**************************************************\n",
      "训练GBDT\n",
      "训练准确率：0.7657\n",
      "测试准确率：0.7634\n",
      "测试精确率：0.8714\n",
      "测试召回率：0.6194\n",
      "测试f1_score：0.7241\n",
      "模型训练耗时：15.412881s\n",
      "**************************************************\n",
      "训练RF\n",
      "训练准确率：0.9810\n",
      "测试准确率：0.7352\n",
      "测试精确率：0.7961\n",
      "测试召回率：0.6343\n",
      "测试f1_score：0.7061\n",
      "模型训练耗时：2.093120s\n",
      "**************************************************\n",
      "训练XGBoost\n",
      "[18:07:31] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "训练准确率：0.8083\n",
      "测试准确率：0.7644\n",
      "测试精确率：0.8527\n",
      "测试召回率：0.6408\n",
      "测试f1_score：0.7317\n",
      "模型训练耗时：14.141809s\n",
      "**************************************************\n"
     ]
    }
   ],
   "source": [
    "model_name_param_dict = {    'LR': (LogisticRegression(penalty =\"l2\")),\n",
    "                             'DT': (DecisionTreeClassifier(max_depth=10,min_samples_split=10)),\n",
    "                             'AdaBoost': (AdaBoostClassifier()),\n",
    "                             'GBDT': (GradientBoostingClassifier()),\n",
    "                             'RF': (RandomForestClassifier()),\n",
    "                             'XGBoost':(XGBClassifier())\n",
    "                         }\n",
    "\n",
    "result_df = pd.DataFrame(columns=['Accuracy (%)','precision(%)','recall(%)','Time (s)'],\n",
    "                             index=list(model_name_param_dict.keys()))\n",
    "\n",
    "for model_name, model in model_name_param_dict.items():\n",
    "    clf, acc,pre,recall, mean_duration = train_model(X_train, y_train,\n",
    "                                                        X_test, y_test,\n",
    "                                                        model,model_name)\n",
    "    result_df.loc[model_name, 'Accuracy (%)'] = acc\n",
    "    result_df.loc[model_name, 'precision(%)'] = pre\n",
    "    result_df.loc[model_name, 'recall(%)'] = recall\n",
    "    result_df.loc[model_name, 'Time (s)'] = mean_duration \n",
    "\n",
    "result_df.to_csv(os.path.join('model_comparison.csv'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 网格搜索调优"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[19:15:38] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:15:44] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:15:50] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:15:55] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:16:05] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:16:13] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:16:25] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:16:41] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:16:53] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:04] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:15] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:22] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:29] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:36] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:43] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:17:50] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:18:03] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:18:18] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:18:31] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:18:44] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[19:18:57] WARNING: C:/Users/Administrator/workspace/xgboost-win64_release_1.3.0/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'max_depth': 6, 'n_estimators': 50}"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#选择xgboost作为最终模型\n",
    "#为减少等待时间，设置较少参数\n",
    "param_grid = {'n_estimators': [50,100],\"max_depth\":[6,8],\n",
    "              }\n",
    "model = XGBClassifier()\n",
    "grid_search = GridSearchCV(model, param_grid, cv=5, scoring='roc_auc')\n",
    "result = grid_search.fit(X_train, y_train)\n",
    "result.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8260450609048304"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result.best_score_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8632838466651894 0.6323648758318455\n"
     ]
    }
   ],
   "source": [
    "pre = result.predict(X_test)\n",
    "print(precision_score(y_test,pre),recall_score(y_test,pre))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "#优质模型保存\n",
    "from sklearn.externals import joblib\n",
    "#保存模型\n",
    "joblib.dump(result,'model.model')\n",
    "\n",
    "#加载模型\n",
    "clf=joblib.load('model.model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, error_score='raise-deprecating',\n",
       "             estimator=XGBClassifier(base_score=None, booster=None,\n",
       "                                     colsample_bylevel=None,\n",
       "                                     colsample_bynode=None,\n",
       "                                     colsample_bytree=None, gamma=None,\n",
       "                                     gpu_id=None, importance_type='gain',\n",
       "                                     interaction_constraints=None,\n",
       "                                     learning_rate=None, max_delta_step=None,\n",
       "                                     max_depth=None, min_child_weight=None,\n",
       "                                     missing=nan, monotone_constrai...\n",
       "                                     objective='binary:logistic',\n",
       "                                     random_state=None, reg_alpha=None,\n",
       "                                     reg_lambda=None, scale_pos_weight=None,\n",
       "                                     subsample=None, tree_method=None,\n",
       "                                     use_label_encoder=True,\n",
       "                                     validate_parameters=None, verbosity=None),\n",
       "             iid='warn', n_jobs=None,\n",
       "             param_grid={'max_depth': [6, 8], 'n_estimators': [50, 100]},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring='roc_auc', verbose=0)"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "clf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
